Study area
The focus area for this project is coastal Maine, extending from the coast to the eastern boundary of the NOAA statistical areas 511, 512, 513.
usStates <- rnaturalearth::ne_states("united states of america", returnclass = "sf")
ne_us <- usStates %>% filter(name == "Maine")
statarea <- sf::st_read(paste0(gmRi::shared.path(group = "Res_Data", folder = "Shapefiles/Statistical_Areas"), "Statistical_Areas_2010_withnames.shp"), quiet = TRUE) %>%
filter(Id %in% c(511, 512, 513)) %>%
mutate(Id = as.factor(Id))
statarea_sf <- sf::st_simplify(statarea, dTolerance = .05)
mcc_turnoff_sf <- sf::st_read(here::here("Data/Shapefiles/MCC_turnoff/MCC_turnoff.shp"), quiet = TRUE)
ggplot() +
geom_sf(data = statarea_sf, aes(fill = Id)) +
geom_sf(data= ne_us, fill = "grey") +
geom_sf(data = mcc_turnoff_sf, fill = "transparent", color = "black") +
scale_fill_discrete(name = "Stat area") +
theme(panel.background = element_blank(),
panel.grid = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank())
Indicators
The following indicators are used in this report
- Surface and bottom temperature (FVCOM)
- Surface and bottom salinity (FVCOM)
- Maine Coastal Current Index (FVCOM)
- Species-based lobster predator index (NOAA Trawl Survey)
# Indicators
FVCOM <- read_csv(here::here("Indicators/FVCOM_stat_area_temps.csv")) %>%
mutate(Year = Year -1)
sal <- read_csv(here::here("Indicators/FVCOM_stat_area_sal.csv")) %>%
mutate(Year = Year -1)
maineCC <- read_csv(here::here("Indicators/mcc_pca_pc1_2.csv")) %>%
rename("Year" = yr, "Month" = mon) %>%
mutate(Year = Year -1)
species_index <- read_rds(here::here("Indicators/nmfs_trawl_lobster_predator_indices.rdata")) %>%
rename("Year" = year) %>%
mutate(Year = Year -1)
mcc_turnoff_subset <- read_csv(here::here("Indicators/mcc_turnoff_subset.csv")) %>%
mutate(Year = Year -1)
cprData <- read_csv(here::here("Indicators/cpr_focal_pca_timeseries_period_1961-20017.csv")) %>%
mutate(`First Mode` = `First Mode`*-1) %>%
rename("Year" = year,
"FirstMode" = `First Mode`,
"SecondMode" = `Second Mode`) %>%
select(Year, FirstMode, SecondMode) %>%
mutate(Year = Year -1)
Temperature
Source: FVCOM NECOFS Monthly Means Thredds Link Methods: * Surface and bottom layer (1 m above bathymetry) * Regridded to regular 0.1 deg grid * Averaged over NOAA statistical area
yrFVCOM <- FVCOM %>%
filter(Month %in% season) %>%
group_by(Year, stat_area) %>%
summarise(sur_temp = mean(sur_temp),
bot_temp = mean(bot_temp), .groups="drop") %>%
mutate(stat_area = as.factor(stat_area))
tPlot1 <- yrFVCOM %>%
ggplot() +
geom_line(aes(Year, sur_temp, col = stat_area)) +
theme_bw() +
labs(y = "SST (deg C)")
tPlot2 <- yrFVCOM %>%
ggplot() +
geom_line(aes(Year, bot_temp, col = stat_area)) +
theme_bw() +
labs(y = "Bottom T (deg C)")
tPlot1/tPlot2

Salinity
Source: FVCOM NECOFS Monthly Means Thredds Link Methods: * Surface and bottom layer (1 m above bathymetry) * Regridded to regular 0.1 deg grid * Averaged over NOAA statistical area for each year
yrSal <- sal %>%
filter(Month %in% season) %>%
group_by(Year, stat_area) %>%
summarise(sur_sal = mean(sur_sal),
bot_sal = mean(bot_sal)) %>%
mutate(stat_area = as.factor(stat_area))
sPlot1 <- yrSal %>%
ggplot() +
geom_line(aes(Year, sur_sal, col = stat_area)) +
theme_bw() +
labs(y = "SSS (deg C)")
sPlot2 <- yrSal %>%
ggplot() +
geom_line(aes(Year, bot_sal, col = stat_area)) +
theme_bw() +
labs(y = "Bottom S (deg C)")
sPlot1/sPlot2

Maine Coastal Current Index
Source: FVCOM NECOFS Monthly Means Thredds Link Methods: * Surface layer * Crop to Maine Coastal Current interest area * Regridded to regular 0.1 deg grid * Averaged over NOAA statistical area for each year * See MCC_index_report.Rmd for details
yrMCC <- maineCC %>%
filter(Month %in% season) %>%
group_by(Year) %>%
summarise(MCCPC1 = mean(PC1))
MCCPC_plot <- yrMCC %>%
mutate(c = ifelse(MCCPC1 > 0, 1, 2)) %>%
ggplot() + geom_col(aes(x = Year, y = MCCPC1, fill = as.factor(c))) + theme_bw() + theme(legend.position = "none") + labs(x = "Year", y = "Maine Coastal Current PC1")
MCCPC_maps <- mcc_turnoff_subset %>%
mutate(Year = lubridate::year(as.Date(Date)),
Month = lubridate::month(as.Date(Date))) %>%
filter(Year %in% c(1982, 1992, 1995, 2001, 2012, 2014),
Month %in% c(5,6,7,8)) %>%
group_by(Year, lat, lon) %>%
summarise(u = mean(u),
v = mean(v), .groups = "drop") %>%
mutate(vel = sqrt(u^2+v^2),
PC = if_else(Year == 2010, "negative PC1", if_else(Year ==2013, "positive PC1", "neutral PC1"))) %>%
ggplot() + geom_sf(data= ne_us, fill = "grey") +
geom_segment(aes(x = lon, y = lat, xend=lon+u, yend=lat+v, color = vel),
arrow = arrow(angle = 10, length = unit(0.05, "inches"), type = "closed")) +
scale_color_viridis_c() + theme_bw() +
coord_sf(datum = "+proj=longlat +datum=NAD83 +no_defs +ellps=GRS80 +towgs84=0,0,0") +
facet_wrap(~Year, nrow = 1)+
theme(panel.background = element_blank(), panel.grid = element_blank(),
axis.title = element_blank(), axis.text = element_blank(),
axis.ticks = element_blank())
MCCPC_plot / MCCPC_maps

Species Based Predator Index
Source: NOAA NE Fisheries Trawl Survey Methods: * Filtered to 15 lobster predators (ASMFC 2020) * Cropped to NOAA stat areas 511, 512, 513 * Stratum abundance: abundance per trawl summed over stat area * Stratified abundance: abundance / km2 multiplied by the area of the strata
yrSpecies_index <-
species_index %>%
filter(season == "Spring",
`stratum id` != 'Strata 511-513') %>%
dplyr::select(Year, `stratum id`, "predators" = `stratified abundance`)
yrSpecies_index %>%
ggplot() +
geom_line(aes(Year, predators, color = `stratum id` )) + theme_bw()

CPR data
Source: Continuous Plankton Recording
cprData %>%
ggplot() +
geom_line(aes(Year, FirstMode)) +
geom_line(aes(Year, SecondMode), color = "red") + theme_bw()

allIndex <- left_join(yrFVCOM, yrSal, by = c("Year", "stat_area")) %>%
left_join(yrMCC, by = c("Year" = "Year")) %>%
left_join(yrSpecies_index, by = c("Year" = "Year",
"stat_area" = "stratum id")) %>%
left_join(cprData, by = "Year") %>%
na.omit()
Indicators PCA
Data: SST, BT, SSS, BS, MCC, Predator Index Method: Principal components analysis Years: 1990-2016
PCA Summary
# PCA by stat area
indicator_pca <- allIndex %>%
nest(data = c(-stat_area, -Year),
Year = Year) %>%
mutate(pca = map(data, ~prcomp(.x, scale. = TRUE, center = TRUE)),
pca_index = map(pca, ~tibble("PC1" = .x$x[,1],
"PC2" = .x$x[,2],
"PC3" = .x$x[,3])))
index_pca <- indicator_pca %>%
unnest(c(pca_index, Year, data))
PCA511 <- data.frame(summary(indicator_pca$pca[[1]])$importance) %>%
rownames_to_column("Result") %>% mutate(stat_area = "511")
PCA512 <- data.frame(summary(indicator_pca$pca[[2]])$importance) %>%
rownames_to_column("Result") %>% mutate(stat_area = "512")
PCA513 <- data.frame(summary(indicator_pca$pca[[3]])$importance) %>%
rownames_to_column("Result") %>% mutate(stat_area = "513")
PCA_table <- bind_rows(PCA511,PCA512,PCA513)
knitr::kable(PCA_table)
| Standard deviation |
1.901027 |
1.305180 |
0.9965156 |
0.8122849 |
0.7891246 |
0.5369032 |
0.2836144 |
0.1957803 |
511 |
| Proportion of Variance |
0.451740 |
0.212940 |
0.1241300 |
0.0824800 |
0.0778400 |
0.0360300 |
0.0100500 |
0.0047900 |
511 |
| Cumulative Proportion |
0.451740 |
0.664680 |
0.7888100 |
0.8712800 |
0.9491200 |
0.9851500 |
0.9952100 |
1.0000000 |
511 |
| Standard deviation |
1.835508 |
1.312145 |
1.0022133 |
0.8565829 |
0.8166847 |
0.6173233 |
0.2653454 |
0.2292375 |
512 |
| Proportion of Variance |
0.421140 |
0.215220 |
0.1255500 |
0.0917200 |
0.0833700 |
0.0476400 |
0.0088000 |
0.0065700 |
512 |
| Cumulative Proportion |
0.421140 |
0.636350 |
0.7619100 |
0.8536200 |
0.9369900 |
0.9846300 |
0.9934300 |
1.0000000 |
512 |
| Standard deviation |
1.612874 |
1.352696 |
1.1286219 |
0.9274575 |
0.8711510 |
0.6376812 |
0.4330075 |
0.2860938 |
513 |
| Proportion of Variance |
0.325170 |
0.228720 |
0.1592200 |
0.1075200 |
0.0948600 |
0.0508300 |
0.0234400 |
0.0102300 |
513 |
| Cumulative Proportion |
0.325170 |
0.553890 |
0.7131200 |
0.8206400 |
0.9155000 |
0.9663300 |
0.9897700 |
1.0000000 |
513 |
Scree Plots
scree1 <- fviz_eig(indicator_pca$pca[[1]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree2 <- fviz_eig(indicator_pca$pca[[2]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree3 <- fviz_eig(indicator_pca$pca[[3]], ylab = "% variance explained") + scale_y_continuous(limits = c(0,60))
scree1 / scree2 / scree3

PC time series
PC1plot <- index_pca %>%
ggplot() +
geom_line(aes(Year, PC1, color = stat_area))
PC2plot <- index_pca %>%
ggplot() +
geom_line(aes(Year, PC2, color = stat_area))
PC3plot <- index_pca %>%
ggplot() +
geom_line(aes(Year, PC3, color = stat_area))
PC1plot / PC2plot / PC3plot

Global PCA
# Global PCA
globalPCA <- allIndex %>%
group_by(Year) %>%
pivot_wider(names_from = stat_area,
values_from = c(sur_temp, bot_temp, sur_sal, bot_sal, MCCPC1, predators)) %>%
nest(data = c(-Year),
Year = Year) %>%
mutate(pca = map(data, ~prcomp(.x, scale. = TRUE, center = TRUE)),
pca_index = map(pca, ~tibble("PC1" = .x$x[,1],
"PC2" = .x$x[,2],
"PC3" = .x$x[,3])))
index_globalPCA <- globalPCA %>%
unnest(c(pca_index, Year, data))
globalPCA_table <- data.frame(summary(globalPCA$pca[[1]])$importance) %>%
rownames_to_column("Result")
knitr::kable(globalPCA_table)
| Standard deviation |
2.992895 |
1.937734 |
1.67806 |
1.181539 |
1.016098 |
0.7975698 |
0.6537792 |
0.5398169 |
0.4653806 |
0.3806505 |
0.3552168 |
0.2813712 |
0.2404182 |
0.2023516 |
0.1249607 |
0.0663937 |
0.0465431 |
0.0261327 |
0 |
0 |
| Proportion of Variance |
0.447870 |
0.187740 |
0.14079 |
0.069800 |
0.051620 |
0.0318100 |
0.0213700 |
0.0145700 |
0.0108300 |
0.0072400 |
0.0063100 |
0.0039600 |
0.0028900 |
0.0020500 |
0.0007800 |
0.0002200 |
0.0001100 |
0.0000300 |
0 |
0 |
| Cumulative Proportion |
0.447870 |
0.635610 |
0.77641 |
0.846210 |
0.897830 |
0.9296400 |
0.9510100 |
0.9655800 |
0.9764100 |
0.9836500 |
0.9899600 |
0.9939200 |
0.9968100 |
0.9988600 |
0.9996400 |
0.9998600 |
0.9999700 |
1.0000000 |
1 |
1 |
# GlobaPCA
fviz_pca_var(globalPCA$pca[[1]],
col.var = "contrib",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
repel = TRUE) + labs(title = "global PCA") +
theme(legend.position = "none")

Chronolgical cluster
allIndex_ca <- allIndex %>%
group_by(Year) %>%
pivot_wider(names_from = stat_area,
values_from = c(sur_temp, bot_temp, sur_sal, bot_sal, MCCPC1, predators)) %>%
column_to_rownames("Year")
allIndex_ca <- scale(allIndex_ca)
# determine number of clusters
wss <- (nrow(allIndex_ca)-1)*sum(apply(allIndex_ca,2,var)) # get sum of squares
for (i in 2:12) wss[i] <- sum(kmeans(allIndex_ca,
centers=i)$withinss)
# look for break in plot like a scree plot
kmeans_scree <- ggplot() +
geom_line(aes(x = c(1:12), y = wss)) +
labs(x = "Number of Clusters",
y ="Within groups sum of squares")
# K-Means Cluster Analysis
fit <- kmeans(allIndex_ca, 3) # 4 cluster solution
# get cluster means
#aggregate(allIndex_ca,by=list(fit$cluster),FUN=mean)
# append cluster assignment
lobData_cluster <- data.frame(allIndex_ca, fit$cluster)
# Cluster Plot against 1st 2 principal components
# vary parameters for most readable graph
indicators_cluster <- cluster::clusplot(allIndex_ca, fit$cluster, color=TRUE, shade=TRUE,
labels=2, lines=0)

indicators_cluster
## $Distances
## NULL
##
## $Shading
## [1] 19.36983 13.47819 13.15198
Breakpoint analysis
Breakpoint analysis of indicators
# breapoint function
bp_analysis <- function(x){
mod <- lm(values ~ Year, data = x)
o <- tryCatch(segmented::segmented(mod, seg.Z = ~Year, psi = c(2005)), # need to estimate bp
error = function(cond){cond})
}
# Breakpoint by stat area
indicator_breakpoint <- allIndex %>%
pivot_longer(cols = c(sur_temp, bot_temp, sur_sal, bot_sal, MCCPC1, predators),
names_to = "Indicator", values_to = "values") %>%
nest(data = c(-stat_area, -Indicator)) %>%
mutate(seg = purrr::map(data, ~bp_analysis(.x)),
bp = purrr::map(seg, ~data.frame(.x$psi)))
# get fitted values and plot over data
bp_plots <- list()
for(i in 1:length(indicator_breakpoint$data)){
name <- paste0(indicator_breakpoint$stat_area[[i]], "_", indicator_breakpoint$Indicator[[i]])
# get the fitted data
fitted_df <- fitted(indicator_breakpoint$seg[[i]])
if(is.null(fitted_df)){
"check error log"
} else {
seg_model <- data.frame(Year = indicator_breakpoint$data[[i]]$Year, values = fitted_df)
# plot the fitted model
bp_plots[[name]] <- indicator_breakpoint$data[[i]] %>%
ggplot() + geom_line(aes(Year, values)) +
geom_line(data = seg_model, aes(x = Year, y = values), col = "dark red")
}
}
set.seed(1)
# extract the breakpoints
break_years <- indicator_breakpoint %>%
unnest(bp) %>%
select(stat_area, Indicator, Est.) %>%
mutate(Indicator = factor(Indicator, levels = c("MCCPC1", "predators", "sur_sal", "sur_temp", "bot_sal", "bot_temp")))
# plot break points
ggplot(break_years) +
geom_point(aes(Est., Indicator, color = stat_area, shape = stat_area), size = 3) +
scale_color_grey(name = "Stat area") +
scale_shape(name = "Stat area") +
labs(x = "Estimated breakpoint")

Brakpoint analysis of PC1 and PC2
set.seed(8)
# Breakpoint by stat area
pc1_breakpoint <- index_pca %>%
select(stat_area, Year, PC1, PC2) %>%
pivot_longer(cols = c(PC1, PC2),
names_to = "Indicator", values_to = "values") %>%
nest(data = c(-stat_area, -Indicator))%>%
mutate(seg = purrr::map(data, ~bp_analysis(.x)),
bp = purrr::map(seg, ~data.frame(.x$psi))) %>%
unnest(bp) %>%
select(stat_area, Indicator, Est., St.Err)
knitr::kable(pc1_breakpoint)
| 511 |
PC1 |
2006.276 |
1.520431 |
| 511 |
PC2 |
1996.371 |
4.341091 |
| 512 |
PC1 |
2006.070 |
1.770614 |
| 512 |
PC2 |
1996.000 |
4.377386 |
| 513 |
PC1 |
2006.000 |
1.946857 |
| 513 |
PC2 |
1997.000 |
4.286314 |
Lobster Data
ALSI <- read_csv(here::here("Biological_data/SettlementIndex.csv"))
ME_landings <- read_csv(here::here("Biological_data/ME_lob_landings_1950_2019.csv"))
ASFMCindicies <- read_csv(here::here("Biological_data/GOMGBK_indices.csv")) %>%
rename("lob_index" = Index, "name" = Survey)
GOMindex <- ASFMCindicies %>%
filter(name %in% c("MeFQ2", "MeMQ2"))
NEFSCindex <- ASFMCindicies %>%
filter(name %in% c("NefscFQ2", "NefscMQ2"))
sublegal <- readxl::read_excel(here::here("Biological_data/RawData.xlsx")) %>%
rename("Year" = year, "Month" = month, "stat_area" = `stat area`)
NEFSC_meIndex <- read_csv(here::here("Biological_data/nmfs_trawl_lobster_indices.csv")) %>%
rename("Year" = year, "stat_area" = `stratum id`)
ALSI index
Source: American Lobster Settlement Index data portal Sites:
- Jonesport, Length: 2002-2018, stat area: 511
- Mt. Desert Island, Length: 2000-2018, stat area: 512
- Outer Penobscot Bay, Length: 2000-2018, stat area: 512
- Mid-coast, Length: 1989-2018, stat area: 513
- Casco Bay, Length: 2000-2018, stat area: 513
- York, Length: 2000-2018, stat area: 513
Methods:
- Sites grouped by NOAA stat area
- Time series cropped to shortest length
- Averaged by stat area
statKey <- data.frame("Location" = c('Jonesport',
'Mt. Desert Island',
'Outer Pen Bay',
'Mid-coast',
'Casco Bay',
'York'), "stat_area" = c(511,512,512,513,513,513))
ALSI <- ALSI %>%
left_join(statKey, by = "Location") %>%
filter(!is.na(stat_area),
Year >= 2000) %>%
group_by(stat_area, Year) %>%
summarise(lob_index = mean(Yoy_density), .groups = "drop") %>%
mutate(stat_area = as.factor(stat_area),
name = "ALSI") %>%
na.omit()
ALSI_cors <- ALSI %>%
left_join(index_pca, by = c("Year", "stat_area")) %>%
group_by(stat_area, name) %>%
summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
corPC2 = corrr::correlate(lob_index, PC2)[[2]],
corPC3 = corrr::correlate(lob_index, PC3)[[2]])
ALSI_plot <- ALSI %>%
ggplot() +
geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
scale_color_discrete(name = "Stat area") +
labs(y = "ALSI yoy density")
ALSI_plot

Sublegal index
Calculate stratified means
- Calculate catch per unit effort for each site for each year
- Multiply cpue by the depth strata area factor
- Group by stat area, sum the outputs
strata_area_factor <- tibble("stat_area" = c(511,512,513),
"1" = c(0.412162162, 0.409847936, 0.370152761),
"2" = c(0.277027027, 0.28602462, 0.397179788),
"3" = c(0.310810811, 0.304127444, 0.23266745)) %>%
pivot_longer(cols = c("1", "2", "3"), names_to = "depth stratum", values_to = "strata_scale") %>%
mutate(`depth stratum` = as.numeric(`depth stratum`))
sublegal_cpue <- sublegal %>%
group_by(`trip ID`, `trip date`, Year, Month, `site ID`,
`depth stratum`, stat_area,
`soak nights`, depth, `depth unit`,
`latitude (dd)`, `longitude (dd)`, `effort ID`) %>%
mutate(lob_count = sum(`sample ID` != 0),
lob_effort = lob_count/`soak nights`) %>%
group_by(`trip ID`, `trip date`, Year, Month,
`site ID`, `depth stratum`, stat_area,
`soak nights`, depth, `depth unit`,
`latitude (dd)`, `longitude (dd)`) %>%
summarise(cpue = sum(lob_effort)/sum(!is.na(unique(`effort ID`))), .groups = "drop") %>%
left_join(strata_area_factor, by = c("stat_area", "depth stratum")) %>%
mutate(stratified_cpue = cpue*strata_scale,
stat_area = as.factor(stat_area)) %>%
group_by(stat_area, Year) %>%
summarise(lob_index = sum(stratified_cpue), .groups = "drop") %>%
mutate(name = "sublegal_cpue")
sub_leagal_cors <- sublegal_cpue %>%
left_join(index_pca, by = c("Year", "stat_area")) %>%
group_by(stat_area, name) %>%
summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
corPC2 = corrr::correlate(lob_index, PC2)[[2]],
corPC3 = corrr::correlate(lob_index, PC3)[[2]])
sublegal_plot <- sublegal_cpue %>%
ggplot() +
geom_line(aes(Year, lob_index, col = as.factor(stat_area))) +
scale_color_discrete(name = "Stat area") +
labs(y = "Sublegal lobster cpue")
sublegal_plot

ME yearly landings
Yearly Maine lobster landings in pounds from 1950-2020.
ME_landings_cors <- ME_landings %>%
left_join(index_pca, by = c("Year")) %>%
group_by(stat_area) %>%
summarise(corPC1 = corrr::correlate(Pounds, PC1)[[2]],
corPC2 = corrr::correlate(Pounds, PC2)[[2]],
corPC3 = corrr::correlate(Pounds, PC3)[[2]])
ME_pounds <- ME_landings %>%
mutate(name = "ME_landings") %>%
select(Year, "lob_index" = Pounds, name)
ME_pounds %>%
ggplot() +
geom_line(aes(Year, lob_index)) +
labs(y = "Maine lobster landings (pounds)")

ME trawl index
ASFMC lobster abundance index based on the Maine trawl survey. Time spans 2003-2018 and indices are split into seasons and sex.
GOM_cors <- GOMindex %>%
left_join(index_pca, by = "Year") %>%
group_by(stat_area, name) %>%
summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
corPC2 = corrr::correlate(lob_index, PC2)[[2]],
corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")
GOMindex %>%
ggplot() +
geom_line(aes(Year, lob_index)) +
facet_wrap(~name) +
labs(y = "ME Trawl lob abundance Index")

Nefsc index
ASFMC lobster abundance index based on the NE fisheries trawl survey. Time spans 1978-2018 and indices are split into seasons and sex.
NEFSC_cors <- NEFSCindex %>%
left_join(index_pca, by = "Year") %>%
group_by(stat_area, name) %>%
summarise(corPC1 = corrr::correlate(lob_index, PC1)[[2]],
corPC2 = corrr::correlate(lob_index, PC2)[[2]],
corPC3 = corrr::correlate(lob_index, PC3)[[2]], .groups = "drop")
NEFSCindex %>%
ggplot() +
geom_line(aes(Year, lob_index)) +
facet_wrap(~name) +
labs(y = "Nefsc trawl lob abundance index")

Biological data analysis
xyPlot <- function(x){
x %>%
left_join(index_pca) %>%
na.omit() %>%
ggplot() +
geom_point(aes(PC1, lob_index, color = stat_area)) +
geom_smooth(aes(PC1, lob_index, color = stat_area), method = "lm", se = FALSE) +
facet_wrap(~name)
}
aicModel <- function(x){
x %>%
left_join(index_pca) %>%
na.omit() %>%
select(stat_area, lob_index, PC1, PC2, PC3, name) %>%
group_by(stat_area, name) %>%
nest() %>%
mutate(aic = purrr::map(data, ~MASS::stepAIC(object = lm(lob_index ~ PC1 + PC2, data = .x), direction = "both", trace = 0)),
stp = purrr::map(aic, broom::glance),
index = purrr::map(aic, ~as.character(.x$call$formula)),
model = paste(index[[1]][[2]], index[[1]][[1]], index[[1]][[3]])) %>%
select(stat_area, stp, model, name) %>%
unnest(c(stp, model))
}
##################################################
# break out trawl survey by stat area
# see what groups together to see if there is a distinction in the life history patterns
# Look at coefficients to see if PC1 is always the biggest
ALSI index
Correlation table
ALSI_cors %>%
na.omit() %>%
knitr::kable()
| 511 |
ALSI |
-0.2865149 |
0.1741885 |
-0.0550406 |
| 512 |
ALSI |
-0.2485525 |
0.0108428 |
-0.2707889 |
| 513 |
ALSI |
-0.4180668 |
0.2322178 |
-0.5501655 |
Scatter plot
xyPlot(ALSI)

AIC lm model selection table
aicSel <- aicModel(ALSI)
knitr::kable(aicSel)
| 511 |
0.0000000 |
0.0000000 |
0.4093072 |
NA |
NA |
NA |
-6.840335 |
17.68067 |
18.95878 |
2.177921 |
13 |
14 |
lob_index ~ 1 |
ALSI |
| 512 |
0.0000000 |
0.0000000 |
0.3432832 |
NA |
NA |
NA |
-5.079515 |
14.15903 |
15.70421 |
1.767650 |
15 |
16 |
lob_index ~ 1 |
ALSI |
| 513 |
0.1747798 |
0.1158355 |
0.4456658 |
2.965169 |
0.1070849 |
1 |
-8.703791 |
23.40758 |
25.72535 |
2.780652 |
14 |
16 |
lob_index ~ PC1 |
ALSI |
Subleagal index
Correlation table
sub_leagal_cors %>%
na.omit() %>%
knitr::kable()
| 511 |
sublegal_cpue |
0.5997732 |
0.0818308 |
0.0970885 |
| 512 |
sublegal_cpue |
0.8482231 |
0.0260158 |
-0.0875769 |
| 513 |
sublegal_cpue |
0.6622880 |
-0.0034103 |
0.4042346 |
Scatter plot
xyPlot(sublegal_cpue)

AIC lm model selection table
aicSel <- aicModel(sublegal_cpue)
knitr::kable(aicSel)
| 511 |
0.3597278 |
0.2796938 |
588.3786 |
4.494687 |
0.0668182 |
1 |
-76.84737 |
159.6947 |
160.6025 |
2769515 |
8 |
10 |
lob_index ~ PC1 |
sublegal_cpue |
| 512 |
0.7194824 |
0.6844177 |
1526.8310 |
20.518710 |
0.0019250 |
1 |
-86.38316 |
178.7663 |
179.6741 |
18649704 |
8 |
10 |
lob_index ~ PC1 |
sublegal_cpue |
| 513 |
0.4386254 |
0.3684535 |
942.6210 |
6.250733 |
0.0369336 |
1 |
-81.56031 |
169.1206 |
170.0284 |
7108274 |
8 |
10 |
lob_index ~ PC1 |
sublegal_cpue |
ME yearly landings
Correlation table
ME_landings_cors %>%
na.omit() %>%
knitr::kable()
| 511 |
0.6191539 |
-0.2215466 |
0.1403908 |
| 512 |
0.5888759 |
0.2676040 |
-0.2320589 |
| 513 |
0.5228336 |
-0.4517793 |
0.3534654 |
Scatter plot
xyPlot(ME_pounds)

AIC lm model selection table
aicSel <- aicModel(ME_pounds)
knitr::kable(aicSel)
| 511 |
0.4324345 |
0.3990483 |
26584322 |
12.95249 |
6.58e-05 |
2 |
-683.4822 |
1374.964 |
1381.408 |
2.402869e+16 |
34 |
37 |
lob_index ~ PC1 + PC2 |
ME_landings |
| 512 |
0.4183867 |
0.3841742 |
26911304 |
12.22904 |
9.97e-05 |
2 |
-683.9345 |
1375.869 |
1382.313 |
2.462342e+16 |
34 |
37 |
lob_index ~ PC1 + PC2 |
ME_landings |
| 513 |
0.4774595 |
0.4467218 |
25508068 |
15.53336 |
1.61e-05 |
2 |
-681.9531 |
1371.906 |
1378.350 |
2.212249e+16 |
34 |
37 |
lob_index ~ PC1 + PC2 |
ME_landings |
ME Trawl index
Correlation table
GOM_cors %>%
filter(name %in% c("MeFQ2", "MeFQ4", "MeMQ2", "MeMQ4")) %>%
na.omit() %>%
knitr::kable()
| 511 |
MeFQ2 |
0.8503890 |
-0.3114242 |
-0.1412119 |
| 511 |
MeMQ2 |
0.8694017 |
-0.3283094 |
-0.0944283 |
| 512 |
MeFQ2 |
0.8349252 |
0.0629934 |
-0.1333419 |
| 512 |
MeMQ2 |
0.8700507 |
0.0538142 |
-0.1017580 |
| 513 |
MeFQ2 |
0.8151903 |
-0.1060868 |
0.4868037 |
| 513 |
MeMQ2 |
0.8421315 |
-0.0901881 |
0.4704269 |
Scatter plot
xyPlot(GOMindex)

AIC lm model selection table
aicSel <- aicModel(GOMindex)
knitr::kable(aicSel)
| 511 |
0.7231615 |
0.6979944 |
14.25576 |
28.73436 |
0.0002301 |
1 |
-51.90344 |
109.8069 |
111.5017 |
2235.492 |
11 |
13 |
lob_index ~ PC1 |
MeFQ2 |
| 512 |
0.6971001 |
0.6695637 |
14.91168 |
25.31562 |
0.0003831 |
1 |
-52.48823 |
110.9765 |
112.6713 |
2445.941 |
11 |
13 |
lob_index ~ PC1 |
MeFQ2 |
| 513 |
0.6645352 |
0.6340384 |
15.69280 |
21.79032 |
0.0006847 |
1 |
-53.15198 |
112.3040 |
113.9988 |
2708.905 |
11 |
13 |
lob_index ~ PC1 |
MeFQ2 |
| 511 |
0.7558594 |
0.7336647 |
13.93372 |
34.05600 |
0.0001132 |
1 |
-51.60640 |
109.2128 |
110.9076 |
2135.633 |
11 |
13 |
lob_index ~ PC1 |
MeMQ2 |
| 512 |
0.7569882 |
0.7348962 |
13.90146 |
34.26529 |
0.0001103 |
1 |
-51.57627 |
109.1525 |
110.8474 |
2125.758 |
11 |
13 |
lob_index ~ PC1 |
MeMQ2 |
| 513 |
0.7091855 |
0.6827479 |
15.20740 |
26.82480 |
0.0003041 |
1 |
-52.74351 |
111.4870 |
113.1819 |
2543.914 |
11 |
13 |
lob_index ~ PC1 |
MeMQ2 |
NEFSC trawl index
Correlation table
NEFSC_cors %>%
filter(name %in% c("NefscFQ2", "NefscFQ4", "NefscMQ2", "NefscMQ4")) %>%
na.omit() %>%
knitr::kable()
| 511 |
NefscFQ2 |
0.6360510 |
-0.1742111 |
0.2511408 |
| 511 |
NefscMQ2 |
0.6037032 |
-0.2228239 |
0.3557187 |
| 512 |
NefscFQ2 |
0.6156753 |
0.1459083 |
-0.1618782 |
| 512 |
NefscMQ2 |
0.6359954 |
0.1370553 |
-0.0749031 |
| 513 |
NefscFQ2 |
0.5471681 |
-0.3412702 |
0.3701525 |
| 513 |
NefscMQ2 |
0.5714269 |
-0.3316981 |
0.3132527 |
Scatter plot
xyPlot(NEFSCindex)

AIC lm model selection table
aicSel <- aicModel(NEFSCindex)
knitr::kable(aicSel)
| 511 |
0.4045609 |
0.3875483 |
1.620595 |
23.78015 |
0.0000233 |
1 |
-69.33603 |
144.6721 |
149.5048 |
91.92145 |
35 |
37 |
lob_index ~ PC1 |
NefscFQ2 |
| 512 |
0.3790561 |
0.3613148 |
1.654939 |
21.36580 |
0.0000499 |
1 |
-70.11195 |
146.2239 |
151.0567 |
95.85878 |
35 |
37 |
lob_index ~ PC1 |
NefscFQ2 |
| 513 |
0.4158582 |
0.3814970 |
1.628581 |
12.10252 |
0.0001074 |
2 |
-68.98166 |
145.9633 |
152.4070 |
90.17741 |
34 |
37 |
lob_index ~ PC1 + PC2 |
NefscFQ2 |
| 511 |
0.4141080 |
0.3796438 |
1.186011 |
12.01559 |
0.0001130 |
2 |
-57.24845 |
122.4969 |
128.9406 |
47.82516 |
34 |
37 |
lob_index ~ PC1 + PC2 |
NefscMQ2 |
| 512 |
0.4044902 |
0.3874756 |
1.178501 |
23.77317 |
0.0000233 |
1 |
-57.54968 |
121.0994 |
125.9321 |
48.61024 |
35 |
37 |
lob_index ~ PC1 |
NefscMQ2 |
| 513 |
0.4365523 |
0.4034083 |
1.163072 |
13.17139 |
0.0000581 |
2 |
-56.52583 |
121.0517 |
127.4953 |
45.99308 |
34 |
37 |
lob_index ~ PC1 + PC2 |
NefscMQ2 |
Cluster Analysis
GOMindex_ca <- GOMindex %>%
filter(Season == 2) %>%
select(Year, lob_index, name)
NEFSCindex_ca <- NEFSCindex %>%
filter(Season == 2) %>%
select(Year, lob_index, name)
lobData <- bind_rows(ALSI, sublegal_cpue, ME_pounds, GOMindex_ca, NEFSCindex_ca) %>%
pivot_wider(names_from = c(name, stat_area),
values_from = lob_index) %>%
na.omit() %>%
column_to_rownames("Year")
# standardize variables
lobData <- scale(lobData) %>%
data.frame()
lobData_t <- data.table::transpose(lobData)
# get row and colnames in order
colnames(lobData_t) <- rownames(lobData)
rownames(lobData_t) <- colnames(lobData)
# determine number of clusters
wss <- (nrow(lobData)-1)*sum(apply(lobData,2,var)) # get sum of squares
for (i in 2:12) wss[i] <- sum(kmeans(lobData,
centers=i)$withinss)
# look for break in plot like a scree plot
plot(1:12, wss, type="b", xlab="Number of Clusters",
ylab="Within groups sum of squares")

fpc::pamk(lobData)
## $pamobject
## Medoids:
## ID ALSI_511 ALSI_512 ALSI_513 sublegal_cpue_511 sublegal_cpue_512
## 2008 3 0.2773898 0.3727694 0.8306286 -0.8219887 -0.9264162
## 2017 12 -0.8002855 -0.6249370 -0.5819441 0.6768536 1.1938697
## sublegal_cpue_513 ME_landings_NA MeFQ2_NA MeMQ2_NA NefscFQ2_NA
## 2008 -0.9486334 -1.4127242 -1.1595342 -1.166756 -1.141641
## 2017 0.8440615 0.3026508 0.9815946 0.931201 1.827133
## NefscMQ2_NA
## 2008 -1.345064
## 2017 1.080726
## Clustering vector:
## 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018
## 1 1 1 1 1 1 2 2 2 2 2 2 2
## Objective function:
## build swap
## 2.069411 1.959058
##
## Available components:
## [1] "medoids" "id.med" "clustering" "objective" "isolation"
## [6] "clusinfo" "silinfo" "diss" "call" "data"
##
## $nc
## [1] 2
##
## $crit
## [1] 0.0000000 0.4911826 0.3453511 0.2884169 0.2354232 0.2033810 0.1929705
## [8] 0.1457884 0.1594838 0.1417225
# K-Means Cluster Analysis
fit <- kmeans(lobData, 2) # 3 cluster solution
# get cluster means
aggregate(lobData,by=list(fit$cluster),FUN=mean)
## Group.1 ALSI_511 ALSI_512 ALSI_513 sublegal_cpue_511 sublegal_cpue_512
## 1 1 0.7126048 0.7845213 0.8609674 -0.7666360 -0.8196149
## 2 2 -0.6108041 -0.6724468 -0.7379720 0.6571166 0.7025270
## sublegal_cpue_513 ME_landings_NA MeFQ2_NA MeMQ2_NA NefscFQ2_NA
## 1 -0.8337142 -0.9249083 -0.8452135 -0.8586839 -0.9237534
## 2 0.7146122 0.7927785 0.7244687 0.7360148 0.7917886
## NefscMQ2_NA
## 1 -0.8807332
## 2 0.7549141
# append cluster assignment
lobData_cluster <- data.frame(lobData, fit$cluster)
# Cluster Plot against 1st 2 principal components
# vary parameters for most readable graph
cluster::clusplot(lobData, fit$cluster, color=TRUE, shade=TRUE,
labels=2, lines=0)

# Centroid Plot against 1st 2 discriminant functions
fpc::plotcluster(lobData, fit$cluster)
